#### Compare various measures of coffee prices

## Setup environment

setwd("~/research/coffee-dataverse/src")

library(readxl)
library(ggplot2)
library(dplyr)

### Read in all price data

## World Bank Pink Sheet commodity data

all.prices <- read_xlsx("../data/CMO-Historical-Data-Annual.xlsx", sheet=4, skip=6)
prices <- all.prices[-1, c('...1', 'Coffee, Arabica', 'Coffee, Robusta')]
names(prices) <- c('year', 'price.arabica', 'price.robusta')
prices$price.arabica <- as.numeric(as.character(prices$price.arabica)) * 100
prices$price.robusta <- as.numeric(as.character(prices$price.robusta)) * 100

df.prices <- data.frame(year=prices$year, price=c(prices$price.arabica, prices$price.robusta), species=rep(c('Arabica', 'Robusta'), each=nrow(prices)), contract='WB Pink Sheet')

## Prices paid to growers

grow.hist <- read.csv("../data/growprices.csv")
subset(grow.hist, Country == "Brazil")
df.grow.hist <- data.frame(year=1965:2013, grow.arabica=as.numeric(subset(grow.hist, Country == "Brazil" & Type == "Brazilian Naturals")[3:ncol(grow.hist)]), grow.robusta=as.numeric(subset(grow.hist, Country == "Brazil" & Type == "Robustas")[3:ncol(grow.hist)]))

grow.recent <- read_xlsx("../data/3a - Prices paid to growers.xlsx", skip=2)
df.grow.recent <- data.frame(year=1990:2019, grow.arabica=as.numeric(grow.recent[grep('Brazil', grow.recent$`Calendar years`),][2, 2:ncol(grow.recent)]), grow.robusta=as.numeric(grow.recent[grep('Brazil', grow.recent$`Calendar years`),][3, 2:ncol(grow.recent)]))

df.grow.wide <- rbind(subset(df.grow.hist, year < 1990), df.grow.recent)
df.grow <- data.frame(year=df.grow.wide$year, price=c(df.grow.wide$grow.arabica, df.grow.wide$grow.robusta), species=rep(c('Arabica', 'Robusta'), each=nrow(df.grow.wide)), contract='ICO Price to Growers')

## Futures prices

future.prices <- read.csv("../data/US Coffee C Futures Historical Data.csv")
future.prices$year <- sapply(future.prices$Date, function(ss) as.numeric(substring(ss, 7, 10)))
df.future.arabica <- future.prices %>% group_by(year) %>% summarize(price=mean(Price))

future.prices <- read.csv("../data/London Robusta Coffee Futures Historical Data.csv")
future.prices$year <- sapply(future.prices$Date, function(ss) as.numeric(substring(ss, 7, 10)))
future.prices$price <- sapply(future.prices$Price, function(ss) as.numeric(gsub(',', '', ss))) / 10
df.future.robusta <- future.prices %>% group_by(year) %>% summarize(price=mean(price))

df.future <- cbind(rbind(cbind(df.future.arabica, species='Arabica'), cbind(df.future.robusta, species='Robusta')), contract='Coffee Futures')

## Combine all data

df <- rbind(df.prices, df.grow, df.future)
df <- subset(df, year >= 1985 & year <= 2020)
df$contract <- factor(df$contract, levels=c("WB Pink Sheet", "ICO Price to Growers", "Coffee Futures"))

## Normalize to 2015
df2 <- df %>% group_by(species, contract) %>% summarize(year=year, price=price / mean(price[year >= 2010 & year <= 2019]))

## Produce graph

ggplot(df2, aes(year, price, colour=contract)) +
    facet_wrap(~ species) +
    geom_line() +
    coord_cartesian(ylim=c(0, 2)) +
    scale_x_continuous(expand=c(0, 0)) + theme_bw() +
    ylab("Price relative to 2010-2019 average") + xlab(NULL) + scale_colour_discrete("Price measure:") +
    theme(panel.spacing = unit(1, "lines"))
ggsave("../figures/data/price-compare-time.pdf", width=6.5, height=4)

## Produce scatterplot matrices

library(GGally)
library(reshape2)

df3 <- df %>% group_by(species, contract) %>% mutate(dprice=c(NA, diff(price)))
df4 <- dcast(df3, year ~ species + contract, value.var='dprice')[, -1]
names(df4) <- c('Arabica Futures', 'Arabica Farm-gate', 'Arabica Commod.', 'Robusta Futures', 'Robusta Farm-gate', 'Robusta Commod.')
ggpairs(df4)
ggsave("../figures/data/price-compare-matrix.pdf", width=8, height=8)

df5 <- df4[, 1:3]
names(df5) <- c('Futures', 'Farm-gate', 'Commodity')
ggpairs(df5) + theme_bw()
ggsave("../figures/data/price-compare-matrix-arabica.pdf", width=4, height=4)

df5 <- df4[, 4:6]
names(df5) <- c('Futures', 'Farm-gate', 'Commodity')
ggpairs(df5) + theme_bw()
ggsave("../figures/data/price-compare-matrix-robusta.pdf", width=4, height=4)
